home *** CD-ROM | disk | FTP | other *** search
/ Cream of the Crop 21 / Cream of the Crop 21 (Terry Blount) (October 1996).iso / sound / rsynth22.zip / ENGLISH.C < prev    next >
C/C++ Source or Header  |  1994-11-08  |  17KB  |  614 lines

  1. #include <config.h>
  2.  
  3. /* $Id: english.c,v 1.13 1994/11/08 13:30:50 a904209 Exp a904209 $
  4.  */
  5. char *english_id = "$Id: english.c,v 1.13 1994/11/08 13:30:50 a904209 Exp a904209 $";
  6.  
  7. /*
  8.    **      English to Phoneme rules.
  9.    **
  10.    **      Derived from:
  11.    **
  12.    **           AUTOMATIC TRANSLATION OF ENGLISH TEXT TO PHONETICS
  13.    **                  BY MEANS OF LETTER-TO-SOUND RULES
  14.    **
  15.    **                      NRL Report 7948
  16.    **
  17.    **                    January 21st, 1976
  18.    **          Naval Research Laboratory, Washington, D.C.
  19.    **
  20.    **
  21.    **      Published by the National Technical Information Service as
  22.    **      document "AD/A021 929".
  23.    **
  24.    **
  25.    **
  26.    **      The Phoneme codes:
  27.    **
  28.    **              IY      bEEt            IH      bIt
  29.    **              EY      gAte            EH      gEt
  30.    **              AE      fAt             AA      fAther
  31.    **              AO      lAWn            OW      lOne
  32.    **              UH      fUll            UW      fOOl
  33.    **              ER      mURdER          AX      About
  34.    **              AH      bUt             AY      hIde
  35.    **              AW      hOW             OY      tOY
  36.    **
  37.    **              p       Pack            b       Back
  38.    **              t       Time            d       Dime
  39.    **              k       Coat            g       Goat
  40.    **              f       Fault           v       Vault
  41.    **              TH      eTHer           DH      eiTHer
  42.    **              s       Sue             z       Zoo
  43.    **              SH      leaSH           ZH      leiSure
  44.    **              HH      How             m       suM
  45.    **              n       suN             NG      suNG
  46.    **              l       Laugh           w       Wear
  47.    **              y       Young           r       Rate
  48.    **              CH      CHar            j       Jar
  49.    **              WH      WHere
  50.    **
  51.    **
  52.    **      Rules are made up of four parts:
  53.    **
  54.    **              The left context.
  55.    **              The text to match.
  56.    **              The right context.
  57.    **              The phonemes to substitute for the matched text.
  58.    **
  59.    **      Procedure:
  60.    **
  61.    **              Seperate each block of letters (apostrophes included)
  62.    **              and add a space on each side.  For each unmatched
  63.    **              letter in the word, look through the rules where the
  64.    **              text to match starts with the letter in the word.  If
  65.    **              the text to match is found and the right and left
  66.    **              context patterns also match, output the phonemes for
  67.    **              that rule and skip to the next unmatched letter.
  68.    **
  69.    **
  70.    **      Special Context Symbols:
  71.    **
  72.    **              #       One or more vowels
  73.    **              :       Zero or more consonants
  74.    **              ^       One consonant.
  75.    **              .       One of B, D, V, G, J, L, M, N, R, W or Z (voiced
  76.    **                      consonants)
  77.    **              %       One of ER, E, ES, ED, ING, ELY (a suffix)
  78.    **                      (Found in right context only)
  79.    **              +       One of E, I or Y (a "front" vowel)
  80.    **
  81.  */
  82.  
  83.  
  84. /* Context definitions */
  85. static char Anything[] = "";
  86.  /* No context requirement */
  87.  
  88. static char Nothing[] = " ";
  89.  /* Context is beginning or end of word */
  90.  
  91. static char Silent[] = "";
  92.  /* No phonemes */
  93.  
  94.  
  95. #define LEFT_PART       0
  96. #define MATCH_PART      1
  97. #define RIGHT_PART      2
  98. #define OUT_PART        3
  99.  
  100. typedef char *Rule[4];
  101.  /* Rule is an array of 4 character pointers */
  102.  
  103.  
  104. /*0 = Punctuation */
  105. /*
  106.    **      LEFT_PART       MATCH_PART      RIGHT_PART      OUT_PART
  107.  */
  108.  
  109.  
  110. static Rule punct_rules[] =
  111. {
  112.  {Anything, " ", Anything, " "},
  113.  {Anything, "-", Anything, ""},
  114.  {".", "'S", Anything, "z"},
  115.  {"#:.E", "'S", Anything, "z"},
  116.  {"#", "'S", Anything, "z"},
  117.  {Anything, "'", Anything, ""},
  118.  {Anything, ",", Anything, " "},
  119.  {Anything, ".", Anything, " "},
  120.  {Anything, "?", Anything, " "},
  121.  {Anything, "!", Anything, " "},
  122.  {Anything, 0, Anything, Silent},
  123. };
  124.  
  125. static Rule A_rules[] =
  126. {
  127.  {Anything, "A", Nothing, "@"},
  128.  {Nothing, "ARE", Nothing, "0r"},
  129.  {Nothing, "AR", "O", "@r"},
  130.  {Anything, "AR", "#", "er"},
  131.  {"^", "AS", "#", "eIs"},
  132.  {Anything, "A", "WA", "@"},
  133.  {Anything, "AW", Anything, "O"},
  134.  {" :", "ANY", Anything, "eni"},
  135.  {Anything, "A", "^+#", "eI"},
  136.  {"#:", "ALLY", Anything, "@li"},
  137.  {Nothing, "AL", "#", "@l"},
  138.  {Anything, "AGAIN", Anything, "@gen"},
  139.  {"#:", "AG", "E", "IdZ"},
  140.  {Anything, "A", "^+:#", "&"},
  141.  {" :", "A", "^+ ", "eI"},
  142.  {Anything, "A", "^%", "eI"},
  143.  {Nothing, "ARR", Anything, "@r"},
  144.  {Anything, "ARR", Anything, "&r"},
  145.  {" :", "AR", Nothing, "0r"},
  146.  {Anything, "AR", Nothing, "3"},
  147.  {Anything, "AR", Anything, "0r"},
  148.  {Anything, "AIR", Anything, "er"},
  149.  {Anything, "AI", Anything, "eI"},
  150.  {Anything, "AY", Anything, "eI"},
  151.  {Anything, "AU", Anything, "O"},
  152.  {"#:", "AL", Nothing, "@l"},
  153.  {"#:", "ALS", Nothing, "@lz"},
  154.  {Anything, "ALK", Anything, "Ok"},
  155.  {Anything, "AL", "^", "Ol"},
  156.  {" :", "ABLE", Anything, "eIb@l"},
  157.  {Anything, "ABLE", Anything, "@b@l"},
  158.  {Anything, "ANG", "+", "eIndZ"},
  159.  {"^", "A", "^#", "eI"},
  160.  {Anything, "A", Anything, "&"},
  161.  {Anything, 0, Anything, Silent},
  162. };
  163.  
  164. static Rule B_rules[] =
  165. {
  166.  {Nothing, "BE", "^#", "bI"},
  167.  {Anything, "BEING", Anything, "biIN"},
  168.  {Nothing, "BOTH", Nothing, "b@UT"},
  169.  {Nothing, "BUS", "#", "bIz"},
  170.  {Anything, "BUIL", Anything, "bIl"},
  171.  {Anything, "B", Anything, "b"},
  172.  {Anything, 0, Anything, Silent},
  173. };
  174.  
  175. static Rule C_rules[] =
  176. {
  177.  {Nothing, "CH", "^", "k"},
  178.  {"^E", "CH", Anything, "k"},
  179.  {Anything, "CH", Anything, "tS"},
  180.  {" S", "CI", "#", "saI"},
  181.  {Anything, "CI", "A", "S"},
  182.  {Anything, "CI", "O", "S"},
  183.  {Anything, "CI", "EN", "S"},
  184.  {Anything, "C", "+", "s"},
  185.  {Anything, "CK", Anything, "k"},
  186.  {Anything, "COM", "%", "kVm"},
  187.  {Anything, "C", Anything, "k"},
  188.  {Anything, 0, Anything, Silent},
  189. };
  190.  
  191. static Rule D_rules[] =
  192. {
  193.  {"#:", "DED", Nothing, "dId"},
  194.  {".E", "D", Nothing, "d"},
  195.  {"#:^E", "D", Nothing, "t"},
  196.  {Nothing, "DE", "^#", "dI"},
  197.  {Nothing, "DO", Nothing, "du"},
  198.  {Nothing, "DOES", Anything, "dVz"},
  199.  {Nothing, "DOING", Anything, "duIN"},
  200.  {Nothing, "DOW", Anything, "daU"},
  201.  {Anything, "DU", "A", "dZu"},
  202.  {Anything, "D", Anything, "d"},
  203.  {Anything, 0, Anything, Silent},
  204. };
  205.  
  206. static Rule E_rules[] =
  207. {
  208.  {"#:", "E", Nothing, ""},
  209.  {"':^", "E", Nothing, ""},
  210.  {" :", "E", Nothing, "i"},
  211.  {"#", "ED", Nothing, "d"},
  212.  {"#:", "E", "D ", ""},
  213.  {Anything, "EV", "ER", "ev"},
  214.  {Anything, "E", "^%", "i"},
  215.  {Anything, "ERI", "#", "iri"},
  216.  {Anything, "ERI", Anything, "erI"},
  217.  {"#:", "ER", "#", "3"},
  218.  {Anything, "ER", "#", "er"},
  219.  {Anything, "ER", Anything, "3"},
  220.  {Nothing, "EVEN", Anything, "iven"},
  221.  {"#:", "E", "W", ""},
  222.  {"T", "EW", Anything, "u"},
  223.  {"S", "EW", Anything, "u"},
  224.  {"R", "EW", Anything, "u"},
  225.  {"D", "EW", Anything, "u"},
  226.  {"L", "EW", Anything, "u"},
  227.  {"Z", "EW", Anything, "u"},
  228.  {"N", "EW", Anything, "u"},
  229.  {"J", "EW", Anything, "u"},
  230.  {"TH", "EW", Anything, "u"},
  231.  {"CH", "EW", Anything, "u"},
  232.  {"SH", "EW", Anything, "u"},
  233.  {Anything, "EW", Anything, "ju"},
  234.  {Anything, "E", "O", "i"},
  235.  {"#:S", "ES", Nothing, "Iz"},
  236.  {"#:C", "ES", Nothing, "Iz"},
  237.  {"#:G", "ES", Nothing, "Iz"},
  238.  {"#:Z", "ES", Nothing, "Iz"},
  239.  {"#:X", "ES", Nothing, "Iz"},
  240.  {"#:J", "ES", Nothing, "Iz"},
  241.  {"#:CH", "ES", Nothing, "Iz"},
  242.  {"#:SH", "ES", Nothing, "Iz"},
  243.  {"#:", "E", "S ", ""},
  244.  {"#:", "ELY", Nothing, "li"},
  245.  {"#:", "EMENT", Anything, "ment"},
  246.  {Anything, "EFUL", Anything, "fUl"},
  247.  {Anything, "EE", Anything, "i"},
  248.  {Anything, "EARN", Anything, "3n"},
  249.  {Nothing, "EAR", "^", "3"},
  250.  {Anything, "EAD", Anything, "ed"},
  251.  {"#:", "EA", Nothing, "i@"},
  252.  {Anything, "EA", "SU", "e"},
  253.  {Anything, "EA", Anything, "i"},
  254.  {Anything, "EIGH", Anything, "eI"},
  255.  {Anything, "EI", Anything, "i"},
  256.  {Nothing, "EYE", Anything, "aI"},
  257.  {Anything, "EY", Anything, "i"},
  258.  {Anything, "EU", Anything, "ju"},
  259.  {Anything, "E", Anything, "e"},
  260.  {Anything, 0, Anything, Silent},
  261. };
  262.  
  263. static Rule F_rules[] =
  264. {
  265.  {Anything, "FUL", Anything, "fUl"},
  266.  {Anything, "F", Anything, "f"},
  267.  {Anything, 0, Anything, Silent},
  268. };
  269.  
  270. static Rule G_rules[] =
  271. {
  272.  {Anything, "GIV", Anything, "gIv"},
  273.  {Nothing, "G", "I^", "g"},
  274.  {Anything, "GE", "T", "ge"},
  275.  {"SU"